#8/12/2015
#replication materials for Spirling "Democratization and Linguistic Complexity", JOP

rm(list=ls())

#Table 4 is the 'main' set of regression results
#contains two models: one with several variables, second wiht just the interaction
#Figure 7 is the estimated marginal effect of the reform act interaction.

#load the data
#setwd("C:/Users/as9934/Dropbox/complexity/August2015/JOP_replication_data/")
load("bigframe.rdata")

############################
### Table 4 ################
############################


#make the 1868_1 dummy
reform.act <- rep(0, times=length(big.frame$year.dummy))
m <- match("1868_1", big.frame$year.dummy)
reform.act[m:length(big.frame$year.dummy)] <- 1
big.frame$reform.act <- reform.act

#do an lm with only the interaction
mod.int <- lm(FK_read.ease~   cabinet*reform.act, data= big.frame)

#do an lm with the controls, too
mod <- lm(FK_read.ease~ party + cabinet +word.count +competitiveness + cabinet*reform.act, data= big.frame)



#clustered SEs (via Drew Dimmery's code)
cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# apply the 'cl' function , cluster on mp
cat("---------------------------------\n")
cat("Simple model: interaction only\n")
mod.clus.reform.int <- cl(big.frame, mod.int, big.frame$mp_code)
print(mod.clus.reform.int)
cat("\n R^2=",summary(mod.int)$r.squared,"\n")
cat("\n adj R^2=",summary(mod.int)$adj.r.squared,"\n")


cat("---------------------------------\n")
cat("Model with controls\n")
mod.clus.reform <-cl(big.frame, mod, big.frame$mp_code)
print(mod.clus.reform)
cat("\n R^2=",summary(mod)$r.squared,"\n")
cat("\n adj R^2=",summary(mod)$adj.r.squared,"\n")

#############################
### Figure 7 ################
#############################

#do the robust SEs in slightly different way (again via Dimmery code)
#for convenience when passing to another function

#this is basically the Dimmery function, just written slightly
#differently
robust.se <- function(model, cluster){
  require(sandwich)
  require(lmtest)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  rcse.se <- coeftest(model, rcse.cov)
  return(list(rcse.cov, rcse.se))
}

clustervar<-mapply(paste,"MP.",big.frame$mp_code, sep="")
mod2int <- mod.int
mod2int$coeftest <- robust.se(mod.int,clustervar)[[2]]
mod2int$se <- robust.se(mod.int,clustervar)[[2]][,2]

#make robust SEs
vcov.cluster <- robust.se(mod.int,clustervar)[[1]]


#use Anton strezhnev's code for interaction plot (modulo some clean up on plotting
# and handling of SEs.)


interaction_plot_binary <- function(model, effect, moderator, interaction, varcov="default", conf=.95, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient", factor_labels=c(0,1)){
  
  # Extract Variance Covariance matrix
  ifelse(varcov == "default", covMat <- vcov(model), covMat <- varcov)
  
  # Extract the data frame of the model
  mod_frame = model.frame(model)
  
  # Get coefficients of variables
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  
  # Create list of moderator values at which marginal effect is evaluated
  x_2 <- c(0,1)
  
  # Compute marginal effects
  delta_1 = beta_1 + beta_3*x_2
  
  # Compute variances
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  
  # Standard errors
  se_1 = sqrt(var_1)
  
  # Upper and lower confidence bounds
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  
  # Determine the bounds of the graphing area
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  
  # Initialize plotting window
  plot(x=c(), y=c(), ylim=c(min_y, max_y), xlim=c(-.5, 1.5), xlab=xlabel, ylab=ylabel, main=title, xaxt="n")
  
  # Plot lines of confidence intervals
  #when moderator==0
  arrows(x_2[1],lower_bound[1], x_2[1],upper_bound[1], code=3, angle=90, lwd=2)
  
  #when moderator == 1
  arrows(x_2[2],lower_bound[2], x_2[2],upper_bound[2], code=3, angle=90, lwd=2)
  
  # Plot points of estimated effects
  points(x=x_2, y=delta_1, pch=21, col="black", bg="blue", cex=2)
  
  # Label the axis
  axis(side=1, at=c(0,1), labels=factor_labels)
  
  # Add a dashed horizontal line for zero
  abline(h=0, lty=3)
  
}


X11()
par(mfrow=c(1,1))
par(bg='cornsilk1')


#do the plot
interaction_plot_binary(mod.int, effect="cabinet1" , moderator="reform.act" , 
                        interaction="cabinet1:reform.act", varcov=vcov.cluster,
                        title="", xlabel="Reform Act", ylabel="Est marginal coef on Cabinet",
                        factor_labels=c("before","after"))
